In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
virginia = pd.read_excel("Virginia_EV.xlsx")
In [3]:
virginia.head()
Out[3]:
Fuel Type Code City State ZIP EV Level2 EVSE Num EV DC Fast Count EV Network Geocode Status Latitude Longitude ... ID Updated At Owner Type Code Open Date EV Connector Types Country Groups With Access Code (French) Access Code Facility Type EV Pricing
0 ELEC Floyd VA 24091 1.0 NaN Non-Networked 200-9 36.910575 -80.317360 ... 39514 2022-04-28 20:01:31 UTC P 2011-06-01 J1772 US Public public HOTEL Free for guests; $10 for non-guests
1 ELEC Richmond VA 23284 2.0 NaN Non-Networked GPS 37.551409 -77.452330 ... 39574 2021-03-11 23:22:17 UTC SG 2011-05-15 J1772 US Public public COLLEGE_CAMPUS Free
2 ELEC Richmond VA 23298 2.0 NaN Non-Networked GPS 37.543387 -77.429530 ... 39575 2022-02-10 19:42:29 UTC SG 2011-05-15 J1772 US Privé private COLLEGE_CAMPUS NaN
3 ELEC Alexandria VA 22304 3.0 1.0 Non-Networked 200-9 38.809675 -77.122192 ... 39763 2022-03-07 19:49:53 UTC P 2011-03-15 CHADEMO J1772 J1772COMBO US Public - Appeler à l'avance public CAR_DEALER Free
4 ELEC Chantilly VA 20151 2.0 1.0 Non-Networked 200-9 38.899751 -77.460168 ... 39764 2022-03-07 19:49:53 UTC P 2011-03-15 CHADEMO J1772 US Public - Appeler à l'avance public CAR_DEALER Free

5 rows × 21 columns

In [4]:
#pip install geopandas 

### Geo pandas is a python library handle shape files.

Let's take a look¶

In [5]:
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly.express as px

######################################### Load Virginia shapefile   ######################################################

#################### Link to shape file:  https://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-1-states-provinces/

virginia_gpd = gpd.read_file('C:\\Users\\Harinath\\Downloads\\ne_110m_populated_places\\ne_110m_populated_places.shp')
virginia = pd.read_excel("Virginia_EV.xlsx")


###################################### Plot EV stations in Virginia using plotly ##############################################
fig = px.scatter_mapbox(virginia, lat='Latitude', lon='Longitude', color='EV Network',
                        color_discrete_map={'Electrify America': 'red', 'Other Providers': 'blue'},
                        hover_data={'EV Network': True},
                        mapbox_style='carto-positron', zoom=6, center={'lat': 38.0037, 'lon': -79.4588})
fig.update_layout(title='EV Stations in Virginia', margin={"r": 0, "t": 30, "l": 0, "b": 0})
fig.show()
In [6]:
virginia.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1210 entries, 0 to 1209
Data columns (total 21 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Fuel Type Code                    1210 non-null   object        
 1   City                              1210 non-null   object        
 2   State                             1210 non-null   object        
 3   ZIP                               1210 non-null   object        
 4   EV Level2 EVSE Num                1051 non-null   float64       
 5   EV DC Fast Count                  205 non-null    float64       
 6   EV Network                        1210 non-null   object        
 7   Geocode Status                    1210 non-null   object        
 8   Latitude                          1210 non-null   float64       
 9   Longitude                         1210 non-null   float64       
 10  Date Last Confirmed               1208 non-null   datetime64[ns]
 11  ID                                1210 non-null   int64         
 12  Updated At                        1210 non-null   object        
 13  Owner Type Code                   617 non-null    object        
 14  Open Date                         1208 non-null   datetime64[ns]
 15  EV Connector Types                1210 non-null   object        
 16  Country                           1210 non-null   object        
 17  Groups With Access Code (French)  1210 non-null   object        
 18  Access Code                       1210 non-null   object        
 19  Facility Type                     562 non-null    object        
 20  EV Pricing                        579 non-null    object        
dtypes: datetime64[ns](2), float64(4), int64(1), object(14)
memory usage: 198.6+ KB

Preprocessing¶

In [7]:
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
In [8]:
#######Let's consider only the J1772 Connector type, since it's the most used and popular##########

virginia['EV Connector Type'] = virginia['EV Connector Types'].map({'J1772': 1})

##### Let's only consider the Hotel, college, shopping mall, parking garae and offices for the facility types. Since, people tend to visit these places often ###############
#############################################################################################################################################################################

virginia['Facility Type'] = virginia['Facility Type'].map({'HOTEL': 1, 'COLLEGE_CAMPUS': 2,'SHOPPING_MALL':3,'PARKING_GARAGE':4, 'OFFICE_BLDG':5})
In [9]:
features = ['Latitude', 'Longitude', 'EV Connector Types', 'Facility Type']
cluster_data = virginia[features]
In [10]:
####################### one-hot encoding for categorical variables ###################################
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse=False)
encoded_features = encoder.fit_transform(virginia[['EV Connector Types', 'Facility Type']])
encoded_feature_names = encoder.get_feature_names_out(['EV Connector Types', 'Facility Type'])


# Combine encoded features with numerical features
features_encoded = pd.DataFrame(encoded_features, columns=encoded_feature_names)
features_encoded[['Latitude', 'Longitude']] = virginia[['Latitude', 'Longitude']]

Transformed DataFrame¶

In [11]:
features_encoded
Out[11]:
EV Connector Types_CHADEMO EV Connector Types_CHADEMO J1772 EV Connector Types_CHADEMO J1772 J1772COMBO EV Connector Types_CHADEMO J1772COMBO EV Connector Types_J1772 EV Connector Types_J1772 J1772COMBO EV Connector Types_J1772 NEMA1450 EV Connector Types_J1772 NEMA515 EV Connector Types_J1772 NEMA520 EV Connector Types_J1772 TESLA ... EV Connector Types_J1772COMBO TESLA EV Connector Types_TESLA Facility Type_1.0 Facility Type_2.0 Facility Type_3.0 Facility Type_4.0 Facility Type_5.0 Facility Type_nan Latitude Longitude
0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 36.910575 -80.317360
1 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 37.551409 -77.452330
2 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 37.543387 -77.429530
3 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.809675 -77.122192
4 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.899751 -77.460168
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1205 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.038041 -78.491176
1206 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.856791 -77.112017
1207 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.405395 -78.906390
1208 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 36.866183 -76.411139
1209 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 37.155661 -76.562663

1210 rows × 21 columns

In [12]:
features_encoded
Out[12]:
EV Connector Types_CHADEMO EV Connector Types_CHADEMO J1772 EV Connector Types_CHADEMO J1772 J1772COMBO EV Connector Types_CHADEMO J1772COMBO EV Connector Types_J1772 EV Connector Types_J1772 J1772COMBO EV Connector Types_J1772 NEMA1450 EV Connector Types_J1772 NEMA515 EV Connector Types_J1772 NEMA520 EV Connector Types_J1772 TESLA ... EV Connector Types_J1772COMBO TESLA EV Connector Types_TESLA Facility Type_1.0 Facility Type_2.0 Facility Type_3.0 Facility Type_4.0 Facility Type_5.0 Facility Type_nan Latitude Longitude
0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 36.910575 -80.317360
1 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 37.551409 -77.452330
2 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 37.543387 -77.429530
3 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.809675 -77.122192
4 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.899751 -77.460168
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1205 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.038041 -78.491176
1206 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.856791 -77.112017
1207 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 38.405395 -78.906390
1208 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 36.866183 -76.411139
1209 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 37.155661 -76.562663

1210 rows × 21 columns

In [13]:
######################################    Setting the number of clusters     ##########################################

### Since my data set is very small, I've directly choosed 8 clusters as a starting poinnt, if we have a huge dataset, 
### we can use elbow plot to do the job for us.

kmeans = KMeans(n_clusters=8)
In [14]:
#############################  Let's asssign the cluster labels to a variable "Cluster"   #############################


virginia['Cluster'] = kmeans.fit_predict(features_encoded)
In [15]:
############################ Seperating Electrify America's stations  from other providers ####################################


electrify_america = virginia[virginia['EV Network'] == 'Electrify America']
other_providers = virginia[virginia['EV Network'] != 'Electrify America']

---------------------------------------------------------------**---------------------------------------------------------------------¶

Identifying clusters where Electrify America is absent among the clustered charging stations.¶

In [16]:
missing_clusters = []
for cluster_id in other_providers['Cluster'].unique():
    cluster_data = other_providers[other_providers['Cluster'] == cluster_id]
    if not any(electrify_america['Cluster'] == cluster_id):
        missing_clusters.append(cluster_id)

Suggesting New Stations¶

In [17]:
###################### Generate suggestions for new Electrify America stations within missing clusters  ########################

suggested_stations = []
for cluster_id in missing_clusters:
    cluster_data = other_providers[other_providers['Cluster'] == cluster_id]
    center_lat = cluster_data['Latitude'].mean()
    center_lon = cluster_data['Longitude'].mean()
    suggested_stations.append((center_lat, center_lon))
In [18]:
print("Suggested new stations for Electrify America:")
for station in suggested_stations:
    print(station)
Suggested new stations for Electrify America:
(37.55729195588232, -77.52949545147378)
(38.8663456309343, -77.27613971807108)
(38.342812017191164, -77.73928477465013)
In [19]:
len(suggested_stations) ### we can increase this number 
Out[19]:
3

---------------------------------------------------------------**---------------------------------------------------------------------¶

Let's do the reverse geo-coding to idenfiy the new location types (school, shopping center, parking lot etc.)¶

In [20]:
#pip install geopy
In [21]:
# suggested_stations['Facility Type']
In [22]:
api_key=""
In [23]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy as np
from geopy.geocoders import GoogleV3



ea_stations = virginia[virginia['EV Network'] == 'Electrify America'].copy()
other_stations = virginia[virginia['EV Network'] != 'Electrify America'].copy()
In [24]:
############################################# Electrify America's Stations #################################################

ea_stations.head(10)
Out[24]:
Fuel Type Code City State ZIP EV Level2 EVSE Num EV DC Fast Count EV Network Geocode Status Latitude Longitude ... Owner Type Code Open Date EV Connector Types Country Groups With Access Code (French) Access Code Facility Type EV Pricing EV Connector Type Cluster
427 ELEC Bristol VA 24202 NaN 4.0 Electrify America GPS 36.635755 -82.127622 ... NaN 2019-05-17 CHADEMO J1772COMBO US Public public NaN NaN NaN 3
451 ELEC Falls Church VA 22044 NaN 6.0 Electrify America GPS 38.867351 -77.142683 ... NaN 2019-10-19 CHADEMO J1772COMBO US Public public NaN NaN NaN 7
490 ELEC Alexandria VA 22306 1.0 3.0 Electrify America GPS 38.742521 -77.086696 ... NaN 2019-11-14 CHADEMO J1772 J1772COMBO US Public public NaN NaN NaN 7
495 ELEC Vienna VA 22182 NaN 4.0 Electrify America GPS 38.930184 -77.245534 ... NaN 2019-12-07 CHADEMO J1772COMBO US Public public NaN NaN NaN 7
554 ELEC Haymarket VA 20169 NaN 4.0 Electrify America GPS 38.819204 -77.645886 ... NaN 2020-04-30 CHADEMO J1772COMBO US Public public NaN NaN NaN 7
565 ELEC Fredericksburg VA 22407 NaN 4.0 Electrify America GPS 38.293961 -77.512350 ... NaN 2020-07-01 CHADEMO J1772COMBO US Public public NaN NaN NaN 7
572 ELEC Reston VA 20191 NaN 4.0 Electrify America GPS 38.950829 -77.358475 ... NaN 2020-07-09 CHADEMO J1772COMBO US Public public NaN NaN NaN 7
578 ELEC Springfield VA 22150 1.0 3.0 Electrify America GPS 38.775894 -77.172441 ... NaN 2020-07-25 CHADEMO J1772 J1772COMBO US Public public NaN NaN NaN 7
582 ELEC Sterling VA 20166 NaN 4.0 Electrify America GPS 38.977930 -77.425894 ... NaN 2020-08-26 CHADEMO J1772COMBO US Public public NaN NaN NaN 7
583 ELEC Fairfax VA 22030 NaN 4.0 Electrify America GPS 38.861694 -77.275739 ... NaN 2020-08-26 CHADEMO J1772COMBO US Public public NaN NaN NaN 7

10 rows × 23 columns

In [25]:
############################## Below key-value pairs allows us to identify the facility type ###################################

type_mapping = {
    'street_address': 'Street Address',
    'premise': 'Premise',
    'car_repair': 'Car Repair',
    'establishment': 'Establishment',
    'car_dealer': 'Car Dealer'
}
In [27]:
################################ Perform clustering on other EV network stations ###########################################

kmeans = KMeans(n_clusters=10)  # Increased number of clusters to 10
other_stations['Cluster'] = kmeans.fit_predict(other_stations[['Latitude', 'Longitude']])

################################ Finding the cluster centers for other stations  ###########################################

cluster_centers, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, other_stations[['Latitude', 'Longitude']])

############### Calculating distances from Electrify America stations to cluster centers of other stations #################

ea_distances = pairwise_distances_argmin_min(ea_stations[['Latitude', 'Longitude']], other_stations[['Latitude', 'Longitude']])

########################################### Let's select suggested stations ################################################

################################ Select 15 stations farthest from existing stations ########################################

suggested_indices = np.argsort(ea_distances[1])[-15:]  
suggested_stations = other_stations.iloc[suggested_indices]

################################# Initialize geolocator with Google Maps Geocoding API key #################################

geolocator = GoogleV3(api_key='')

################################## Function to get location details and facility type ######################################

def get_location_details(latitude, longitude):
    location = geolocator.reverse((latitude, longitude), exactly_one=True)
    if location:
        
        ############## Extract facility type from the types field in the geocoding response ##################
        
        facility_type = next(iter(location.raw.get('types', [])), None)
        facility_description = type_mapping.get(facility_type, facility_type)
        return {
            'Location Details': location.address,
            'Facility Type': facility_type
        }
    else:
        return None
    
########## Apply the function to the DataFrame and expand the result into separate columns for existing stations ############

ea_stations[['Location Details', 'Facility Type']] = ea_stations.apply(
    lambda row: pd.Series(get_location_details(row['Latitude'], row['Longitude'])), axis=1)

######## Apply the function to the DataFrame and expand the result into separate columns for suggested stations #############

suggested_stations[['Location Details', 'Facility Type']] = suggested_stations.apply(
    lambda row: pd.Series(get_location_details(row['Latitude'], row['Longitude'])), axis=1)

####################################### EV stations in Virginia using Plotly Express ########################################

fig = px.scatter_mapbox(virginia, lat='Latitude', lon='Longitude', color='EV Network',
                        color_discrete_map={'Electrify America': 'red', 'Other Providers': 'blue'},
                        hover_data={'EV Network': True, 'Facility Type': True},  # Add 'Facility Type' to hover data
                        mapbox_style='carto-positron', zoom=6, center={'lat': 38.0037, 'lon': -79.4588})

########################## Adding layout to the existing Electrify America stations #########################################

fig.add_trace(go.Scattermapbox(
    lat=ea_stations['Latitude'],
    lon=ea_stations['Longitude'],
    mode='markers',
    marker=dict(size=10, color='red'),
    name='Existing Electrify America Stations',
    hoverinfo='text',
    text=['<b>EV Network</b>: {}<br><b>Facility</b>: {}<br><b>Location Details</b>: {}'.format(ev_network, facility_type, location_details)
          for ev_network, facility_type, location_details in zip(ea_stations['EV Network'], ea_stations['Facility Type'], ea_stations['Location Details'])]
))

########################## Adding layout to the suggested Electrify America stations #########################################

fig.add_trace(go.Scattermapbox(
    lat=suggested_stations['Latitude'],
    lon=suggested_stations['Longitude'],
    mode='markers',
    marker=dict(symbol='circle', size=10, color='blue'),# Change symbol to circle and color to blue
    name='New Electrify America Stations',
    hoverinfo='text',
    text=['<b>EV Network</b>: {}<br><b>Facility</b>: {}<br><b>Location Details</b>: {}'.format(ev_network, facility_type, location_details)
          for ev_network, facility_type, location_details in zip(suggested_stations['EV Network'], suggested_stations['Facility Type'], suggested_stations['Location Details'])]
))

# Updating layout
fig.update_layout(title='EV Stations in Virginia', margin={"r": 0, "t": 30, "l": 0, "b": 0})
fig.show()
C:\Users\Harinath\AppData\Local\Temp\ipykernel_720\2524339197.py:49: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [ ]:
 
In [ ]: